head(crime420co4)
##                category persistent_id    date      lat     long street_id
## 1 anti-social-behaviour               2023-01 51.88306 0.909136   2153366
## 2 anti-social-behaviour               2023-01 51.90124 0.901681   2153173
## 3 anti-social-behaviour               2023-01 51.88907 0.897722   2153077
## 4 anti-social-behaviour               2023-01 51.89122 0.901988   2153186
## 5 anti-social-behaviour               2023-01 51.89416 0.895433   2153012
## 6 anti-social-behaviour               2023-01 51.88050 0.909014   2153379
##                     street_name context        id location_type
## 1      On or near Military Road      NA 107596596         Force
## 2                   On or near       NA 107596646         Force
## 3 On or near Culver Street West      NA 107595950         Force
## 4       On or near Ryegate Road      NA 107595953         Force
## 5       On or near Market Close      NA 107595979         Force
## 6         On or near Lisle Road      NA 107595985         Force
##   location_subtype outcome_status
## 1                            <NA>
## 2                            <NA>
## 3                            <NA>
## 4                            <NA>
## 5                            <NA>
## 6                            <NA>
head(temp180co4)
##   station_ID       Date TemperatureCAvg TemperatureCMax TemperatureCMin TdAvgC
## 1       3590 2023-12-31             8.7            10.6             4.4    7.2
## 2       3590 2023-12-30             6.6             9.7             4.4    4.2
## 3       3590 2023-12-29             9.9            11.4             6.9    6.0
## 4       3590 2023-12-28             9.9            11.5             4.0    7.5
## 5       3590 2023-12-27             5.8            10.6             3.9    3.7
## 6       3590 2023-12-26             9.8            12.7             6.3    7.6
##   HrAvg WindkmhDir WindkmhInt WindkmhGust PresslevHp Precmm TotClOct lowClOct
## 1  89.6          S       25.0        63.0      999.0    6.2      8.0      8.0
## 2  85.5        WSW       22.7        50.0     1006.9    0.4      4.6      6.5
## 3  77.2         SW       32.8        61.2     1003.6    0.8      6.5      6.7
## 4  84.6        SSW       32.2        70.4     1003.2    2.8      6.8      7.1
## 5  86.4         SW       13.2        37.1     1016.4    2.0      4.0      6.9
## 6  86.9        WSW       23.5        46.3     1006.2    4.4      6.5      7.4
##   SunD1h VisKm PreselevHp SnowDepcm
## 1    0.0  26.3         NA        NA
## 2    1.1  48.3         NA        NA
## 3    0.1  26.7         NA        NA
## 4    0.0  25.1         NA        NA
## 5    3.2  30.1         NA        NA
## 6    0.0  45.8         NA        NA
library(stringr)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(wordcloud2)
library(DT)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(ggplot2)
# View the structure of the data frames
str(crime420co4)
## 'data.frame':    6878 obs. of  12 variables:
##  $ category        : chr  "anti-social-behaviour" "anti-social-behaviour" "anti-social-behaviour" "anti-social-behaviour" ...
##  $ persistent_id   : chr  "" "" "" "" ...
##  $ date            : chr  "2023-01" "2023-01" "2023-01" "2023-01" ...
##  $ lat             : num  51.9 51.9 51.9 51.9 51.9 ...
##  $ long            : num  0.909 0.902 0.898 0.902 0.895 ...
##  $ street_id       : int  2153366 2153173 2153077 2153186 2153012 2153379 2153105 2153541 2152937 2153107 ...
##  $ street_name     : chr  "On or near Military Road" "On or near " "On or near Culver Street West" "On or near Ryegate Road" ...
##  $ context         : logi  NA NA NA NA NA NA ...
##  $ id              : int  107596596 107596646 107595950 107595953 107595979 107595985 107596603 107596291 107596305 107596453 ...
##  $ location_type   : chr  "Force" "Force" "Force" "Force" ...
##  $ location_subtype: chr  "" "" "" "" ...
##  $ outcome_status  : chr  NA NA NA NA ...
str(temp180co4)
## 'data.frame':    365 obs. of  18 variables:
##  $ station_ID     : int  3590 3590 3590 3590 3590 3590 3590 3590 3590 3590 ...
##  $ Date           : chr  "2023-12-31" "2023-12-30" "2023-12-29" "2023-12-28" ...
##  $ TemperatureCAvg: num  8.7 6.6 9.9 9.9 5.8 9.8 12.5 10 9.6 10 ...
##  $ TemperatureCMax: num  10.6 9.7 11.4 11.5 10.6 12.7 14.3 12 10.8 12.6 ...
##  $ TemperatureCMin: num  4.4 4.4 6.9 4 3.9 6.3 9.5 8.4 8.1 8.1 ...
##  $ TdAvgC         : num  7.2 4.2 6 7.5 3.7 7.6 10.1 7 6.5 6.2 ...
##  $ HrAvg          : num  89.6 85.5 77.2 84.6 86.4 86.9 85.3 81.5 81.2 78.2 ...
##  $ WindkmhDir     : chr  "S" "WSW" "SW" "SSW" ...
##  $ WindkmhInt     : num  25 22.7 32.8 32.2 13.2 23.5 34.1 32.7 34.1 37.5 ...
##  $ WindkmhGust    : num  63 50 61.2 70.4 37.1 46.3 72.3 61.2 68.6 77.8 ...
##  $ PresslevHp     : num  999 1007 1004 1003 1016 ...
##  $ Precmm         : num  6.2 0.4 0.8 2.8 2 4.4 0.8 0.8 0 2 ...
##  $ TotClOct       : num  8 4.6 6.5 6.8 4 6.5 7.8 5 8 7.5 ...
##  $ lowClOct       : num  8 6.5 6.7 7.1 6.9 7.4 7.8 6.7 8 7.5 ...
##  $ SunD1h         : num  0 1.1 0.1 0 3.2 0 0 2.9 0 1.4 ...
##  $ VisKm          : num  26.3 48.3 26.7 25.1 30.1 45.8 61.8 72.9 69.4 34.3 ...
##  $ PreselevHp     : logi  NA NA NA NA NA NA ...
##  $ SnowDepcm      : int  NA NA NA NA NA NA NA NA NA NA ...
# Check for missing values
sum(is.na(crime420co4))
## [1] 7555
sum(is.na(temp180co4))
## [1] 851
# Load the stringr packag
# Create a new variable for the cleaned dataset
cleaned_crimeco4 <- crime420co4

# List of numeric columns
numeric_columns <- sapply(cleaned_crimeco4, is.numeric)

# Replace NA values in numeric columns with mean
cleaned_crimeco4[numeric_columns] <- lapply(cleaned_crimeco4[numeric_columns], function(x) {
    ifelse(is.na(x), round(mean(x, na.rm = TRUE), 1), x)
})

# Data cleaning for cleaned_crimeco4
# Fill missing values in outcome_status
cleaned_crimeco4$outcome_status[is.na(cleaned_crimeco4$outcome_status)] <- "No Information"

# Clean street names in crime data
cleaned_crimeco4$street_name <- str_trim(str_to_lower(cleaned_crimeco4$street_name))

# Parse the date column in the cleaned_crimeco4 dataset
cleaned_crimeco4$date <- ym(cleaned_crimeco4$date)

# Remove irrelevant columns (context, location_subtype)
cleaned_crimeco4 <- subset(cleaned_crimeco4, select = -c(context, location_subtype))

head(cleaned_crimeco4)
##                category persistent_id       date      lat     long street_id
## 1 anti-social-behaviour               2023-01-01 51.88306 0.909136   2153366
## 2 anti-social-behaviour               2023-01-01 51.90124 0.901681   2153173
## 3 anti-social-behaviour               2023-01-01 51.88907 0.897722   2153077
## 4 anti-social-behaviour               2023-01-01 51.89122 0.901988   2153186
## 5 anti-social-behaviour               2023-01-01 51.89416 0.895433   2153012
## 6 anti-social-behaviour               2023-01-01 51.88050 0.909014   2153379
##                     street_name        id location_type outcome_status
## 1      on or near military road 107596596         Force No Information
## 2                    on or near 107596646         Force No Information
## 3 on or near culver street west 107595950         Force No Information
## 4       on or near ryegate road 107595953         Force No Information
## 5       on or near market close 107595979         Force No Information
## 6         on or near lisle road 107595985         Force No Information
cleaned_tempco4 <- temp180co4
# List of numeric columns in temp180co4
numeric_columns_temp <- sapply(cleaned_tempco4, is.numeric)

# Replace NA values in numeric columns with mean, preserving original precision
cleaned_tempco4[numeric_columns_temp] <- lapply(cleaned_tempco4[numeric_columns_temp], function(x) {
  ifelse(is.na(x), round(mean(x, na.rm = TRUE), 1), x)  
})



# Parse the Date column in temp180co4 dataset
cleaned_tempco4$Date <- ymd(cleaned_tempco4$Date)

# Remove irrelevant columns (PreselevHp, SnowDepcm)
cleaned_tempco4 <- cleaned_tempco4[, !names(cleaned_tempco4) %in% c("PreselevHp", "SnowDepcm")]

head(cleaned_tempco4)
##   station_ID       Date TemperatureCAvg TemperatureCMax TemperatureCMin TdAvgC
## 1       3590 2023-12-31             8.7            10.6             4.4    7.2
## 2       3590 2023-12-30             6.6             9.7             4.4    4.2
## 3       3590 2023-12-29             9.9            11.4             6.9    6.0
## 4       3590 2023-12-28             9.9            11.5             4.0    7.5
## 5       3590 2023-12-27             5.8            10.6             3.9    3.7
## 6       3590 2023-12-26             9.8            12.7             6.3    7.6
##   HrAvg WindkmhDir WindkmhInt WindkmhGust PresslevHp Precmm TotClOct lowClOct
## 1  89.6          S       25.0        63.0      999.0    6.2      8.0      8.0
## 2  85.5        WSW       22.7        50.0     1006.9    0.4      4.6      6.5
## 3  77.2         SW       32.8        61.2     1003.6    0.8      6.5      6.7
## 4  84.6        SSW       32.2        70.4     1003.2    2.8      6.8      7.1
## 5  86.4         SW       13.2        37.1     1016.4    2.0      4.0      6.9
## 6  86.9        WSW       23.5        46.3     1006.2    4.4      6.5      7.4
##   SunD1h VisKm
## 1    0.0  26.3
## 2    1.1  48.3
## 3    0.1  26.7
## 4    0.0  25.1
## 5    3.2  30.1
## 6    0.0  45.8
# View the structure of the data frames
str(cleaned_tempco4)
## 'data.frame':    365 obs. of  16 variables:
##  $ station_ID     : int  3590 3590 3590 3590 3590 3590 3590 3590 3590 3590 ...
##  $ Date           : Date, format: "2023-12-31" "2023-12-30" ...
##  $ TemperatureCAvg: num  8.7 6.6 9.9 9.9 5.8 9.8 12.5 10 9.6 10 ...
##  $ TemperatureCMax: num  10.6 9.7 11.4 11.5 10.6 12.7 14.3 12 10.8 12.6 ...
##  $ TemperatureCMin: num  4.4 4.4 6.9 4 3.9 6.3 9.5 8.4 8.1 8.1 ...
##  $ TdAvgC         : num  7.2 4.2 6 7.5 3.7 7.6 10.1 7 6.5 6.2 ...
##  $ HrAvg          : num  89.6 85.5 77.2 84.6 86.4 86.9 85.3 81.5 81.2 78.2 ...
##  $ WindkmhDir     : chr  "S" "WSW" "SW" "SSW" ...
##  $ WindkmhInt     : num  25 22.7 32.8 32.2 13.2 23.5 34.1 32.7 34.1 37.5 ...
##  $ WindkmhGust    : num  63 50 61.2 70.4 37.1 46.3 72.3 61.2 68.6 77.8 ...
##  $ PresslevHp     : num  999 1007 1004 1003 1016 ...
##  $ Precmm         : num  6.2 0.4 0.8 2.8 2 4.4 0.8 0.8 0 2 ...
##  $ TotClOct       : num  8 4.6 6.5 6.8 4 6.5 7.8 5 8 7.5 ...
##  $ lowClOct       : num  8 6.5 6.7 7.1 6.9 7.4 7.8 6.7 8 7.5 ...
##  $ SunD1h         : num  0 1.1 0.1 0 3.2 0 0 2.9 0 1.4 ...
##  $ VisKm          : num  26.3 48.3 26.7 25.1 30.1 45.8 61.8 72.9 69.4 34.3 ...
str(cleaned_crimeco4)
## 'data.frame':    6878 obs. of  10 variables:
##  $ category      : chr  "anti-social-behaviour" "anti-social-behaviour" "anti-social-behaviour" "anti-social-behaviour" ...
##  $ persistent_id : chr  "" "" "" "" ...
##  $ date          : Date, format: "2023-01-01" "2023-01-01" ...
##  $ lat           : num  51.9 51.9 51.9 51.9 51.9 ...
##  $ long          : num  0.909 0.902 0.898 0.902 0.895 ...
##  $ street_id     : int  2153366 2153173 2153077 2153186 2153012 2153379 2153105 2153541 2152937 2153107 ...
##  $ street_name   : chr  "on or near military road" "on or near" "on or near culver street west" "on or near ryegate road" ...
##  $ id            : int  107596596 107596646 107595950 107595953 107595979 107595985 107596603 107596291 107596305 107596453 ...
##  $ location_type : chr  "Force" "Force" "Force" "Force" ...
##  $ outcome_status: chr  "No Information" "No Information" "No Information" "No Information" ...
# Check for missing values
sum(is.na(cleaned_tempco4))
## [1] 0
sum(is.na(cleaned_crimeco4))
## [1] 0

Lets perform various analyses to gain information and potentially improve the safety and well-being of the community.

# Assuming your cleaned crime dataset is named 'cleaned_crimeco4'
# Create a table of crime categories and their frequencies
crime_category_freq <- table(cleaned_crimeco4$category)

# Convert the table to a data frame
crime_category_df <- as.data.frame(crime_category_freq)

# Rename the columns for better visualization
colnames(crime_category_df) <- c("Crime_Category", "Frequency")

# Generate the word cloud
wordcloud2(crime_category_df, size = 1.2)
# Create a frequency table for the 'category' variable
category_table <- table(cleaned_crimeco4$category)

# Calculate percentages for the frequency table
category_table_percentage <- prop.table(category_table) * 100

# Convert the table to a data frame and include percentages
category_table_df <- as.data.frame.table(category_table)
category_table_df$Percentage <- paste0(round(category_table_percentage, 2), "%")

# Create a two-way table for 'category' and 'street_name'
two_way_table_street <- table(cleaned_crimeco4$category, cleaned_crimeco4$street_name)

# Calculate percentages for the two-way table
two_way_table_street_percentage <- prop.table(two_way_table_street, margin = 1) * 100

# Convert tables to data frames and include percentages
two_way_table_street_df <- as.data.frame(two_way_table_street)
two_way_table_street_df$Percentage <- paste0(round(two_way_table_street_percentage, 2), "%")

# Create interactive tables
datatable(category_table_df, caption = "Frequency Table for Category")
datatable(two_way_table_street_df, caption = "Two-way Table for Category and Street Name")
# Load the plotly package
library(plotly)

# Convert the category_table_df to a data frame
category_table_df <- as.data.frame(category_table_df)

# Create an interactive horizontal bar plot
plot <- plot_ly(data = category_table_df, y = ~Var1, x = ~Freq, type = 'bar', 
                orientation = 'h',
                name = "Category Frequency") %>%
        layout(title = "Crime Category Frequency",
               yaxis = list(title = "Category"),
               xaxis = list(title = "Frequency"))

# Display the interactive plot
plot
# Filter dataset to include only violent crime incidents
violent_crime_data <- cleaned_crimeco4 %>%
  filter(category == "violent-crime")

# Count occurrences of violent crime incidents for each street
street_counts <- violent_crime_data %>%
  count(street_name) %>%
  arrange(desc(n))  # Arrange by descending order of count

# Select the top 10 streets
top_10_streets <- street_counts %>%
  slice_max(n, n = 10)

# Define a qualitative color palette from Color Brewer
color_palette <- c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "#ffff33", "#a65628", "#f781bf", "#999999")

# Create a pie chart for the top 10 streets with custom colors and without labels
pie_chart <- plot_ly(top_10_streets, labels = ~street_name, values = ~n, type = 'pie',
                     textinfo = 'none', marker = list(colors = color_palette)) %>%
  layout(title = "Distribution of Violent Crime Incidents for Top 10 Streets")

# Convert the pie chart to an interactive plotly object
pie_chart_interactive <- ggplotly(pie_chart)

# Display the interactive pie chart
pie_chart_interactive
p <- ggplot(cleaned_crimeco4, aes(x = street_name, fill = category)) +
  geom_bar() +
  theme(axis.text.x = element_blank())  # Remove x-axis labels

# Convert it to an interactive chart
p_interactive <- ggplotly(p)

# Display the interactive chart
p_interactive
custom_palette <- c("#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00", "#FFFF33", "#A65628", "#F781BF",
                    "#999999", "#FFA500", "#DECF3F", "#0072B2", "#D55E00", "#CC79A7")

# Create the bar plot
bar_plot <- ggplot(data = cleaned_crimeco4, aes(x = category, fill = outcome_status)) +
  geom_bar(position = "dodge") +
  labs(title = "Bar Plot of Category and Outcome Status",
       x = "Category",
       y = "Count",
       fill = "Outcome Status") +
  scale_x_discrete(labels = NULL) +  # Remove category labels
  scale_fill_manual(values = custom_palette) +  # Set custom color palette
  theme_minimal() +
  guides(fill = "none") +  # Remove the legend
  theme(panel.background = element_rect(fill = "lightgray"))  # Set background color inside the plot

# Convert to plotly object
bar_plot_interactive <- ggplotly(bar_plot)

# Display the interactive plot
bar_plot_interactive
# Filter the dataset to include only violent crime incidents
violent_crimes <- cleaned_crimeco4 %>%
  filter(category == "violent-crime")

# Create the 2D density plot
plot <- ggplot(violent_crimes, aes(x = date, y = lat)) +
  geom_density_2d(alpha = 0.5) +  # Removed `fill` parameter
  labs(title = "2D Density Plot of Violent Crimes",
       x = "Date",
       y = "Latitude") +
  theme_minimal() +
  theme(panel.background = element_rect(fill = "grey90"))  # Set background color to grey

# Convert ggplot object to plotly object
plot_interactive <- ggplotly(plot)

# Display the interactive plot
plot_interactive
# Create a ggplot object for the box plot
g <- ggplot(cleaned_crimeco4, aes(x = factor(category), y = outcome_status))

# Add the box plot layer
g <- g + geom_boxplot(fill = "lightblue", color = "blue")  # Adjust box colors as needed

# Set labels and title
g <- g + labs(title = "Outcome Status of Incidents by Category",
              x = "Incident Category")

# Customize the plot appearance
g <- g + theme_minimal() +
       theme(axis.text.x = element_text(angle = 45, hjust = 1),  # Rotate and align labels
             axis.text.y = element_blank(),  # Remove y-axis labels
             panel.background = element_rect(fill = "grey90"))  # Set grey color for inside of plot area

# Convert ggplot object to plotly object
g <- ggplotly(g)

# Display the interactive plot
g
# Filter the dataset for "violent crime" category
violent_crime_data <- cleaned_crimeco4 %>%
  filter(category == "violent-crime")

# Create a violin plot
violin_plot <- ggplot(violent_crime_data, aes(x = outcome_status, y = lat, fill = outcome_status)) +
  geom_violin() +
  labs(title = "Outcome Status of Violent Crimes") +
  theme_minimal() +
  theme(axis.text.x = element_blank(),  # Remove x-axis labels
        axis.text.y = element_blank(),  # Remove y-axis labels
        legend.position = "none")  # Remove legend

# Convert ggplot object to plotly object
violin_plot_interactive <- ggplotly(violin_plot, source_data = row.names(violent_crime_data))

# Display the interactive plot
violin_plot_interactive
library(viridis)
## Loading required package: viridisLite
# Convert date column to Date format using lubridate's ymd() function
cleaned_crimeco4$date <- ymd(cleaned_crimeco4$date)

# Group by date and category, count occurrences
daily_crime_counts <- cleaned_crimeco4 %>%
  group_by(date, category) %>%
  summarise(count = n(), .groups = "drop")

# Get the number of unique categories
num_categories <- length(unique(daily_crime_counts$category))

# Create a custom color palette
custom_palette <- viridis(num_categories)

# Create the interactive time series plot using plotly
plot <- plot_ly(daily_crime_counts, x = ~date, y = ~count, color = ~category, type = 'scatter', mode = 'lines', name = ~category, colors = custom_palette) %>%
  layout(title = "Daily Crime Counts by Category",
         xaxis = list(title = "Date"),
         yaxis = list(title = "Count"))

# Display the interactive plot
plot
# Filter the dataset to include only violent crime incidents
violent_crimes <- cleaned_crimeco4 %>%
  filter(category == "violent-crime")

# Convert date column to Date format using lubridate's ymd() function
violent_crimes$date <- ymd(violent_crimes$date)

# Group by date, count occurrences
daily_violent_crime_counts <- violent_crimes %>%
  group_by(date) %>%
  summarise(count = n(), .groups = "drop") %>%
  ungroup()

# Create a smoothed time series using loess smoothing
smoothed_count <- loess(count ~ as.numeric(date), data = daily_violent_crime_counts)

# Create the interactive time series plot using plotly
plot <- plot_ly(daily_violent_crime_counts, x = ~date, y = ~count, type = 'scatter', mode = 'lines', name = 'Daily Counts') %>%
  add_trace(x = daily_violent_crime_counts$date, y = predict(smoothed_count), mode = 'lines', name = 'Smoothed') %>%
  layout(title = "Daily Violent Crime Counts with Smoothing",
         xaxis = list(title = "Date"),
         yaxis = list(title = "Count"))

# Display the interactive plot
plot
# Convert Date column to character format
cleaned_tempco4$Date <- as.character(cleaned_tempco4$Date)

cleaned_tempco4$Date <- ymd(cleaned_tempco4$Date)

# Group crime data by date and count occurrences
daily_crime_counts <- cleaned_crimeco4 %>%
  group_by(date) %>%
  summarise(count = n(), .groups = "drop")

# Merge crime counts with temperature data
merged_data <- left_join(daily_crime_counts, cleaned_tempco4, by = c("date" = "Date"))

# Create time series plot with crime counts and temperature
time_series_plot <- plot_ly(merged_data, x = ~date) %>%
  add_trace(y = ~count, mode = "lines", name = "Crime Counts", type = 'scatter', line = list(color = 'red')) %>%
  add_trace(y = ~TemperatureCAvg, mode = "lines", name = "Average Temperature", type = 'scatter', line = list(color = 'blue')) %>%
  layout(title = "Crime Counts and Temperature Over Time",
         xaxis = list(title = "Date"),
         yaxis = list(title = "Count/Temperature"),
         plot_bgcolor = "rgba(211,211,211,0.2)",  # Set grey background color with some transparency
         paper_bgcolor = "rgba(211,211,211,0.2)")  # Set grey background color for the plot area

time_series_plot
# Load the plotly library
library(plotly)

# Filter the dataset to include only violent crime incidents
violent_crimes <- cleaned_crimeco4 %>%
  filter(category == "violent-crime")

# Convert date column to Date format
violent_crimes$date <- ymd(violent_crimes$date)

# Group by date and calculate minimum and maximum temperatures
crime_weather_data <- violent_crimes %>%
  left_join(cleaned_tempco4, by = c("date" = "Date")) %>%
  group_by(date) %>%
  summarise(min_temp = min(TemperatureCMin, na.rm = TRUE),
            max_temp = max(TemperatureCMax, na.rm = TRUE))

# Create scatter plot
scatter_plot <- plot_ly(data = crime_weather_data, x = ~date) %>%
  add_trace(y = ~min_temp, name = "Minimum Temperature", type = 'scatter', mode = 'markers', marker = list(color = 'blue')) %>%
  add_trace(y = ~max_temp, name = "Maximum Temperature", type = 'scatter', mode = 'markers', marker = list(color = 'red')) %>%
  layout(title = "Minimum and Maximum Temperature During Violent Crimes",
         xaxis = list(title = "Date"),
         yaxis = list(title = "Temperature (°C)", side = "left"),
         yaxis2 = list(title = "", overlaying = "y", side = "right"),
         legend = list(x = 1.05, y = 1, bgcolor = "rgba(255, 255, 255, 0.5)"),  # Adjust legend position
         margin = list(r = 150),  # Increase right margin to accommodate legend
         plot_bgcolor = "rgba(211,211,211,0.2)",  # Set grey background color with some transparency
         paper_bgcolor = "rgba(211,211,211,0.2)")  # Set grey background color for the plot area

# Make the plot interactive
scatter_plot <- scatter_plot %>% config(displayModeBar = TRUE)

# Display the interactive plot
scatter_plot
# Select relevant weather variables from cleaned_tempco4
weather_data <- cleaned_tempco4[, c("Date", "TemperatureCAvg", "Precmm", "SunD1h")]

# Calculate count of crime incidents for each date from cleaned_crimeco4
crime_count <- cleaned_crimeco4 %>%
  group_by(date) %>%
  summarise(crime_count = n())

# Merge crime incident count and weather data based on the date
merged_data <- left_join(crime_count, weather_data, by = c("date" = "Date"))

# Step 2: Calculate correlation coefficients
correlation_matrix <- cor(merged_data[, -1], use = "complete.obs")

# Step 3: Visualize the correlation coefficients
heatmap_plot <- plot_ly(z = correlation_matrix, type = "heatmap", colorscale = "Viridis") %>%
  layout(title = "Correlation Between Crime Incidents and Weather Variables",
         xaxis = list(title = "Weather Variables"),
         yaxis = list(title = "Weather Variables"),
         margin = list(l = 100, b = 100))  # Adjust margins for better display

# Step 4: Identify significant correlations
# Analyze the heatmap visually to identify significant positive or negative correlations

# Display the interactive heatmap
heatmap_plot
# Step 1: Prepare the data
# Filter crime data to include only violent crime incidents
violent_crime_data <- cleaned_crimeco4 %>%
  filter(category == "violent-crime")

# Calculate count of violent crime incidents for each date
violent_crime_count <- violent_crime_data %>%
  group_by(date) %>%
  summarise(violent_crime_count = n())

# Select relevant weather variables from cleaned_tempco4
weather_data <- cleaned_tempco4[, c("Date", "TemperatureCAvg", "Precmm", "SunD1h")]

# Merge violent crime incident count and weather data based on the date
merged_data <- left_join(violent_crime_count, weather_data, by = c("date" = "Date"))

# Step 2: Calculate correlation coefficients
correlation_matrix <- cor(merged_data[, -1], use = "complete.obs")

# Step 3: Visualize the correlation coefficients
heatmap_plot <- plot_ly(z = correlation_matrix, type = "heatmap", colorscale = "Portland") %>%
  layout(title = "Correlation Between Violent Crime Incidents and Weather Variables",
         xaxis = list(title = "Weather Variables"),
         yaxis = list(title = "Weather Variables"),
         margin = list(l = 100, b = 100))  # Adjust margins for better display

# Step 4: Identify significant correlations
# Analyze the heatmap visually to identify significant positive or negative correlations

# Display the interactive heatmap
heatmap_plot
# Merge crime counts with weather data
merged_data <- left_join(daily_crime_counts, cleaned_tempco4, by = c("date" = "Date"))

# Create line plots for each weather variable
temp_plot <- plot_ly(merged_data, x = ~date) %>%
  add_trace(y = ~TemperatureCAvg, name = "Average Temperature", type = 'scatter', mode = 'lines', line = list(color = 'blue')) %>%
  layout(title = "Average Temperature Over Time",
         xaxis = list(title = "Date"),
         yaxis = list(title = "Average Temperature (°C)"))

precipitation_plot <- plot_ly(merged_data, x = ~date) %>%
  add_trace(y = ~Precmm, name = "Precipitation", type = 'scatter', mode = 'lines', line = list(color = 'green')) %>%
  layout(title = "Precipitation Over Time",
         xaxis = list(title = "Date"),
         yaxis = list(title = "Precipitation (mm)"))

sunlight_plot <- plot_ly(merged_data, x = ~date) %>%
  add_trace(y = ~SunD1h, name = "Sunlight Hours", type = 'scatter', mode = 'lines', line = list(color = 'orange')) %>%
  layout(title = "Sunlight Hours Over Time",
         xaxis = list(title = "Date"),
         yaxis = list(title = "Sunlight Hours"))

# Combine plots into a single subplot
subplot <- subplot(temp_plot, precipitation_plot, sunlight_plot, nrows = 3)

# Make the subplot interactive
subplot <- subplot %>% config(displayModeBar = TRUE)

# Display the subplot
subplot
library(MASS)  # Load the MASS package for kde2d function
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:plotly':
## 
##     select
## The following object is masked from 'package:dplyr':
## 
##     select
# Filtered crime data with latitude and longitude columns
crime_data <- cleaned_crimeco4 %>%
  filter(!is.na(lat) & !is.na(long))

# Calculate the 2D kernel density estimation of crime incidents
density <- kde2d(crime_data$long, crime_data$lat)

# Create a point density plot (heatmap)
density_plot <- plot_ly(z = ~density$z, type = "heatmap", colorscale = "Viridis", zauto = FALSE, zmax = max(density$z)) %>%
  layout(title = "Point Density Plot of Crime Incidents",
         xaxis = list(title = "Longitude"),
         yaxis = list(title = "Latitude"))

# Display the interactive point density plot
density_plot
# Function to categorize dates into seasons
get_season <- function(date) {
  month <- month(date)
  if (month %in% 3:5) {
    return("Spring")
  } else if (month %in% 6:8) {
    return("Summer")
  } else if (month %in% 9:11) {
    return("Fall")
  } else {
    return("Winter")
  }
}

# Apply the function to categorize dates into seasons
cleaned_crimeco4 <- cleaned_crimeco4 %>%
  mutate(season = factor(sapply(date, get_season)))

# Aggregate crime incidents by season
crime_counts_season <- cleaned_crimeco4 %>%
  group_by(season) %>%
  summarise(crime_count = n())

# Aggregate weather variables by season (taking averages for simplicity)
weather_season <- cleaned_tempco4 %>%
  mutate(season = factor(sapply(Date, get_season))) %>%
  group_by(season) %>%
  summarise(avg_temp = mean(TemperatureCAvg, na.rm = TRUE),
            avg_precipitation = mean(Precmm, na.rm = TRUE),
            avg_sunshine = mean(SunD1h, na.rm = TRUE))

# Create interactive box plot for crime incidents by season
crime_box_plot <- plot_ly(data = crime_counts_season, x = ~season, y = ~crime_count, type = "box") %>%
  layout(title = "Crime Incidents by Season",
         xaxis = list(title = "Season"),
         yaxis = list(title = "Crime Count"))


# Display the interactive plots
crime_box_plot
# Filter the dataset to include only violent crime incidents
violent_crimes <- cleaned_crimeco4 %>%
  filter(category == "violent-crime")

# Apply the function to categorize dates into seasons
violent_crimes <- violent_crimes %>%
  mutate(season = factor(sapply(date, get_season)))

# Aggregate violent crime incidents by season
violent_crime_counts_season <- violent_crimes %>%
  group_by(season) %>%
  summarise(violent_crime_count = n())

# Create interactive bar plot for violent crime incidents by season
violent_crime_bar_plot <- plot_ly(data = violent_crime_counts_season, x = ~season, y = ~violent_crime_count, type = "bar") %>%
  layout(title = "Violent Crime Incidents by Season",
         xaxis = list(title = "Season"),
         yaxis = list(title = "Violent Crime Count"))

# Display the interactive plot
violent_crime_bar_plot
colnames(cleaned_crimeco4)
##  [1] "category"       "persistent_id"  "date"           "lat"           
##  [5] "long"           "street_id"      "street_name"    "id"            
##  [9] "location_type"  "outcome_status" "season"
colnames(cleaned_tempco4)
##  [1] "station_ID"      "Date"            "TemperatureCAvg" "TemperatureCMax"
##  [5] "TemperatureCMin" "TdAvgC"          "HrAvg"           "WindkmhDir"     
##  [9] "WindkmhInt"      "WindkmhGust"     "PresslevHp"      "Precmm"         
## [13] "TotClOct"        "lowClOct"        "SunD1h"          "VisKm"
library(leaflet)

# Define the path to the downloaded icon
icon_path <- "/Users/nithyashree/Downloads/icons8-high-risk-16.png"

# Define a custom icon with popup
customIcon <- makeIcon(
  iconUrl = icon_path,  # Local file path to the icon
  iconWidth = 16,       # Icon width
  iconHeight = 16       # Icon height
)

# Create Leaflet map
crime_map <- leaflet() %>%
  addTiles() %>%
  addMarkers(
    data = cleaned_crimeco4,
    lng = ~long,
    lat = ~lat,
    icon = customIcon,  # Use custom icon
    popup = ~paste("Category: ", category, "<br>Date: ", date)  # Specify popup content
  )

# Save the map as an HTML file
saveWidget(crime_map, file = "crime_map.html")